Package Bio :: Package Restriction :: Module Restriction
[hide private]
[frames] | no frames]

Source Code for Module Bio.Restriction.Restriction

   1  #!/usr/bin/env python 
   2  # 
   3  #      Restriction Analysis Libraries. 
   4  #      Copyright (C) 2004. Frederic Sohm. 
   5  # 
   6  # This code is part of the Biopython distribution and governed by its 
   7  # license.  Please see the LICENSE file that should have been included 
   8  # as part of this package. 
   9  # 
  10   
  11  """Restriction Enzyme classes. 
  12   
  13  Notes about the diverses class of the restriction enzyme implementation:: 
  14   
  15              RestrictionType is the type of all restriction enzymes. 
  16          ----------------------------------------------------------------------- 
  17              AbstractCut implements some methods that are common to all enzymes. 
  18          ----------------------------------------------------------------------- 
  19              NoCut, OneCut,TwoCuts   represent the number of double strand cuts 
  20                                      produced by the enzyme. 
  21                                      they correspond to the 4th field of the 
  22                                      rebase record emboss_e.NNN. 
  23                      0->NoCut    : the enzyme is not characterised. 
  24                      2->OneCut   : the enzyme produce one double strand cut. 
  25                      4->TwoCuts  : two double strand cuts. 
  26          ----------------------------------------------------------------------- 
  27              Meth_Dep, Meth_Undep    represent the methylation susceptibility to 
  28                                      the enzyme. 
  29                                      Not implemented yet. 
  30          ----------------------------------------------------------------------- 
  31              Palindromic,            if the site is palindromic or not. 
  32              NotPalindromic          allow some optimisations of the code. 
  33                                      No need to check the reverse strand 
  34                                      with palindromic sites. 
  35          ----------------------------------------------------------------------- 
  36              Unknown, Blunt,         represent the overhang. 
  37              Ov5, Ov3                Unknown is here for symmetry reasons and 
  38                                      correspond to enzymes that are not 
  39                                      characterised in rebase. 
  40          ----------------------------------------------------------------------- 
  41              Defined, Ambiguous,     represent the sequence of the overhang. 
  42              NotDefined 
  43                                      NotDefined is for enzymes not characterised 
  44                                      in rebase. 
  45   
  46                                      Defined correspond to enzymes that display 
  47                                      a constant overhang whatever the sequence. 
  48                                      ex : EcoRI. G^AATTC -> overhang :AATT 
  49                                                  CTTAA^G 
  50   
  51                                      Ambiguous : the overhang varies with the 
  52                                      sequence restricted. 
  53                                      Typically enzymes which cut outside their 
  54                                      restriction site or (but not always) 
  55                                      inside an ambiguous site. 
  56                                      ex: 
  57                                      AcuI CTGAAG(22/20)  -> overhang : NN 
  58                                      AasI GACNNN^NNNGTC  -> overhang : NN 
  59                                           CTGN^NNNNNCAG 
  60   
  61                  note : these 3 classes refers to the overhang not the site. 
  62                     So the enzyme ApoI (RAATTY) is defined even if its 
  63                     restriction site is ambiguous. 
  64   
  65                          ApoI R^AATTY -> overhang : AATT -> Defined 
  66                               YTTAA^R 
  67                     Accordingly, blunt enzymes are always Defined even 
  68                     when they cut outside their restriction site. 
  69          ----------------------------------------------------------------------- 
  70              Not_available,          as found in rebase file emboss_r.NNN files. 
  71              Commercially_available 
  72                                      allow the selection of the enzymes 
  73                                      according to their suppliers to reduce the 
  74                                      quantity of results. 
  75                                      Also will allow the implementation of 
  76                                      buffer compatibility tables. Not 
  77                                      implemented yet. 
  78   
  79                                      the list of suppliers is extracted from 
  80                                      emboss_s.NNN 
  81          ----------------------------------------------------------------------- 
  82   
  83  """ 
  84   
  85  from __future__ import print_function 
  86   
  87  import warnings 
  88   
  89  from Bio._py3k import zip 
  90  from Bio._py3k import filter 
  91  from Bio._py3k import range 
  92   
  93  import re 
  94  import itertools 
  95   
  96  from Bio.Seq import Seq, MutableSeq 
  97  from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict 
  98  from Bio.Restriction.Restriction_Dictionary import typedict 
  99  from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict 
 100  from Bio.Restriction.PrintFormat import PrintFormat 
 101  from Bio import BiopythonWarning 
102 103 104 # Used to use Bio.Restriction.DNAUtils.check_bases (and expose it under this 105 # namespace), but have deprecated that module. 106 107 108 -def _check_bases(seq_string):
109 """Check characters in a string (PRIVATE). 110 111 Remove digits and white space present in string. Allows any valid ambiguous 112 IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, lower case are converted). 113 114 Other characters (e.g. symbols) trigger a TypeError. 115 116 Returns the string WITH A LEADING SPACE (!). This is for backwards 117 compatibility, and may in part be explained by the fact that 118 ``Bio.Restriction`` doesn't use zero based counting. 119 """ 120 # Remove white space and make upper case: 121 seq_string = "".join(seq_string.split()).upper() 122 # Remove digits 123 for c in "0123456789": 124 seq_string = seq_string.replace(c, "") 125 # Check only allowed IUPAC letters 126 if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")): 127 raise TypeError("Invalid character found in %s" % repr(seq_string)) 128 return " " + seq_string
129 130 131 matching = {'A': 'ARWMHVDN', 'C': 'CYSMHBVN', 'G': 'GRSKBVDN', 132 'T': 'TYWKHBDN', 'R': 'ABDGHKMNSRWV', 'Y': 'CBDHKMNSTWVY', 133 'W': 'ABDHKMNRTWVY', 'S': 'CBDGHKMNSRVY', 'M': 'ACBDHMNSRWVY', 134 'K': 'BDGHKNSRTWVY', 'H': 'ACBDHKMNSRTWVY', 135 'B': 'CBDGHKMNSRTWVY', 'V': 'ACBDGHKMNSRWVY', 136 'D': 'ABDGHKMNSRTWVY', 'N': 'ACBDGHKMNSRTWVY'} 137 138 DNA = Seq
139 140 141 -class FormattedSeq(object):
142 """A linear or ciruclar sequence object for restriction analysis. 143 144 Translates a Bio.Seq into a formatted sequence to be used with Restriction. 145 146 Roughly: remove anything which is not IUPAC alphabet and then add a space 147 in front of the sequence to get a biological index instead of a 148 python index (i.e. index of the first base is 1 not 0). 149 150 Retains information about the shape of the molecule linear (default) or 151 circular. Restriction sites are search over the edges of circular sequence. 152 """ 153
154 - def __init__(self, seq, linear=True):
155 """Initialize ``FormattedSeq`` with sequence and topology (optional). 156 157 ``seq`` is either a ``Bio.Seq``, ``Bio.MutableSeq`` or a 158 ``FormattedSeq``. If ``seq`` is a ``FormattedSeq``, ``linear`` 159 will have no effect on the shape of the sequence. 160 """ 161 if isinstance(seq, (Seq, MutableSeq)): 162 stringy = str(seq) 163 self.lower = stringy.islower() 164 # Note this adds a leading space to the sequence (!) 165 self.data = _check_bases(stringy) 166 self.linear = linear 167 self.klass = seq.__class__ 168 self.alphabet = seq.alphabet 169 elif isinstance(seq, FormattedSeq): 170 self.lower = seq.lower 171 self.data = seq.data 172 self.linear = seq.linear 173 self.alphabet = seq.alphabet 174 self.klass = seq.klass 175 else: 176 raise TypeError('expected Seq or MutableSeq, got %s' % type(seq))
177
178 - def __len__(self):
179 """Return length of ``FormattedSeq``. 180 181 ``FormattedSeq`` has a leading space, thus substract 1. 182 """ 183 return len(self.data) - 1
184
185 - def __repr__(self):
186 """Represent ``FormattedSeq`` class as a string.""" 187 return 'FormattedSeq(%s, linear=%s)' % (repr(self[1:]), 188 repr(self.linear))
189
190 - def __eq__(self, other):
191 """Implement equality operator for ``FormattedSeq`` object.""" 192 if isinstance(other, FormattedSeq): 193 if repr(self) == repr(other): 194 return True 195 else: 196 return False 197 return False
198
199 - def circularise(self):
200 """Circularise sequence in place.""" 201 self.linear = False 202 return
203
204 - def linearise(self):
205 """Linearise sequence in place.""" 206 self.linear = True 207 return
208
209 - def to_linear(self):
210 """Make a new instance of sequence as linear.""" 211 new = self.__class__(self) 212 new.linear = True 213 return new
214
215 - def to_circular(self):
216 """Make a new instance of sequence as circular.""" 217 new = self.__class__(self) 218 new.linear = False 219 return new
220
221 - def is_linear(self):
222 """Return if sequence is linear (True) or circular (False).""" 223 return self.linear
224
225 - def finditer(self, pattern, size):
226 """Return a list of a given pattern which occurs in the sequence. 227 228 The list is made of tuple (location, pattern.group). 229 The latter is used with non palindromic sites. 230 Pattern is the regular expression pattern corresponding to the 231 enzyme restriction site. 232 Size is the size of the restriction enzyme recognition-site size. 233 """ 234 if self.is_linear(): 235 data = self.data 236 else: 237 data = self.data + self.data[1:size] 238 return [(i.start(), i.group) for i in re.finditer(pattern, data)]
239
240 - def __getitem__(self, i):
241 """Return substring of ``FormattedSeq``. 242 243 The class of the returned object is the class of the respective 244 sequence. Note that due to the leading space, indexing is 1-based: 245 246 >>> from Bio.Seq import Seq 247 >>> from Bio.Restriction.Restriction import FormattedSeq 248 >>> f_seq = FormattedSeq(Seq('ATGCATGC')) 249 >>> f_seq[1] 250 Seq('A') 251 252 """ 253 if self.lower: 254 return self.klass((self.data[i]).lower(), self.alphabet) 255 return self.klass(self.data[i], self.alphabet)
256
257 258 -class RestrictionType(type):
259 """RestrictionType. Type from which all enzyme classes are derived. 260 261 Implement the operator methods. 262 """ 263
264 - def __init__(cls, name='', bases=(), dct=None):
265 """Initialize RestrictionType instance. 266 267 Not intended to be used in normal operation. The enzymes are 268 instantiated when importing the module. 269 See below. 270 """ 271 if "-" in name: 272 raise ValueError("Problem with hyphen in %s as enzyme name" 273 % repr(name)) 274 # 2011/11/26 - Nobody knows what this call was supposed to accomplish, 275 # but all unit tests seem to pass without it. 276 # super(RestrictionType, cls).__init__(cls, name, bases, dct) 277 try: 278 cls.compsite = re.compile(cls.compsite) 279 except AttributeError: 280 # Can happen if initialised wrongly. 281 # (This was seen when Sphinx api-doc imports the classes, and 282 # tried to automatically general documentation for them) 283 pass 284 except Exception: 285 raise ValueError("Problem with regular expression, re.compiled(%s)" 286 % repr(cls.compsite))
287
288 - def __add__(cls, other):
289 """Add restriction enzyme to a RestrictionBatch(). 290 291 If other is an enzyme returns a batch of the two enzymes. 292 If other is already a RestrictionBatch add enzyme to it. 293 """ 294 if isinstance(other, RestrictionType): 295 return RestrictionBatch([cls, other]) 296 elif isinstance(other, RestrictionBatch): 297 return other.add_nocheck(cls) 298 else: 299 raise TypeError
300
301 - def __div__(cls, other):
302 """Override '/' operator to use as search method. 303 304 >>> from Bio.Restriction import EcoRI 305 >>> EcoRI/Seq('GAATTC') 306 [2] 307 308 Returns RE.search(other). 309 """ 310 return cls.search(other)
311
312 - def __rdiv__(cls, other):
313 """Override division with reversed operands to use as search method. 314 315 >>> from Bio.Restriction import EcoRI 316 >>> Seq('GAATTC')/EcoRI 317 [2] 318 319 Returns RE.search(other). 320 """ 321 return cls.search(other)
322
323 - def __truediv__(cls, other):
324 """Override Python 3 division operator to use as search method. 325 326 Like __div__. 327 """ 328 return cls.search(other)
329
330 - def __rtruediv__(cls, other):
331 """As __truediv___, with reversed operands. 332 333 Like __rdiv__. 334 """ 335 return cls.search(other)
336
337 - def __floordiv__(cls, other):
338 """Override '//' operator to use as catalyse method. 339 340 >>> from Bio.Restriction import EcoRI 341 >>> EcoRI//Seq('GAATTC') 342 (Seq('G'), Seq('AATTC')) 343 344 Returns RE.catalyse(other). 345 """ 346 return cls.catalyse(other)
347
348 - def __rfloordiv__(cls, other):
349 """As __floordiv__, with reversed operands. 350 351 >>> from Bio.Restriction import EcoRI 352 >>> Seq('GAATTC')//EcoRI 353 (Seq('G'), Seq('AATTC')) 354 355 Returns RE.catalyse(other). 356 """ 357 return cls.catalyse(other)
358
359 - def __str__(cls):
360 """Return the name of the enzyme as string.""" 361 return cls.__name__
362
363 - def __repr__(cls):
364 """Implement repr method. 365 366 Used with eval or exec will instantiate the enzyme. 367 """ 368 return "%s" % cls.__name__
369
370 - def __len__(cls):
371 """Return length of recognition site of enzyme as int.""" 372 try: 373 return cls.size 374 except AttributeError: 375 # Happens if the instance was not initialised as expected. 376 # e.g. if instance created by a documentation framework 377 # like Sphinx trying to inspect the class automatically, 378 # Also seen within IPython. 379 return 0
380
381 - def __hash__(cls):
382 """Implement ``hash()`` method for ``RestrictionType``. 383 384 Python default is to use ``id(...)`` 385 This is consistent with the ``__eq__`` implementation 386 """ 387 return id(cls)
388
389 - def __eq__(cls, other):
390 """Override '==' operator. 391 392 True if RE and other are the same enzyme. 393 394 Specifically this checks they are the same Python object. 395 """ 396 # assert (id(cls)==id(other)) == (other is cls) == (cls is other) 397 return id(cls) == id(other)
398
399 - def __ne__(cls, other):
400 """Override '!=' operator. 401 402 Isoschizomer strict (same recognition site, same restriction) -> False 403 All the other-> True 404 405 WARNING - This is not the inverse of the __eq__ method 406 407 >>> from Bio.Restriction import SacI, SstI 408 >>> SacI != SstI # true isoschizomers 409 False 410 >>> SacI == SstI 411 False 412 """ 413 if not isinstance(other, RestrictionType): 414 return True 415 elif cls.charac == other.charac: 416 return False 417 else: 418 return True
419
420 - def __rshift__(cls, other):
421 """Override '>>' operator to test for neoschizomers. 422 423 neoschizomer : same recognition site, different restriction. -> True 424 all the others : -> False 425 426 >>> from Bio.Restriction import SmaI, XmaI 427 >>> SmaI >> XmaI 428 True 429 """ 430 if not isinstance(other, RestrictionType): 431 return False 432 elif cls.site == other.site and cls.charac != other.charac: 433 return True 434 else: 435 return False
436
437 - def __mod__(cls, other):
438 """Override '%' operator to test for compatible overhangs. 439 440 True if a and b have compatible overhang. 441 442 >>> from Bio.Restriction import XhoI, SalI 443 >>> XhoI % SalI 444 True 445 """ 446 if not isinstance(other, RestrictionType): 447 raise TypeError('expected RestrictionType, got %s instead' 448 % type(other)) 449 return cls._mod1(other)
450
451 - def __ge__(cls, other):
452 """Compare length of recognition site of two enzymes. 453 454 Override '>='. a is greater or equal than b if the a site is longer 455 than b site. If their site have the same length sort by alphabetical 456 order of their names. 457 458 >>> from Bio.Restriction import EcoRI, EcoRV 459 >>> EcoRI.size 460 6 461 >>> EcoRV.size 462 6 463 >>> EcoRI >= EcoRV 464 False 465 """ 466 if not isinstance(other, RestrictionType): 467 raise NotImplementedError 468 if len(cls) > len(other): 469 return True 470 elif cls.size == len(other) and cls.__name__ >= other.__name__: 471 return True 472 else: 473 return False
474
475 - def __gt__(cls, other):
476 """Compare length of recognition site of two enzymes. 477 478 Override '>'. Sorting order: 479 480 1. size of the recognition site. 481 2. if equal size, alphabetical order of the names. 482 483 """ 484 if not isinstance(other, RestrictionType): 485 raise NotImplementedError 486 if len(cls) > len(other): 487 return True 488 elif cls.size == len(other) and cls.__name__ > other.__name__: 489 return True 490 else: 491 return False
492
493 - def __le__(cls, other):
494 """Compare length of recognition site of two enzymes. 495 496 Override '<='. Sorting order: 497 498 1. size of the recognition site. 499 2. if equal size, alphabetical order of the names. 500 501 """ 502 if not isinstance(other, RestrictionType): 503 raise NotImplementedError 504 elif len(cls) < len(other): 505 return True 506 elif len(cls) == len(other) and cls.__name__ <= other.__name__: 507 return True 508 else: 509 return False
510
511 - def __lt__(cls, other):
512 """Compare length of recognition site of two enzymes. 513 514 Override '<'. Sorting order: 515 516 1. size of the recognition site. 517 2. if equal size, alphabetical order of the names. 518 519 """ 520 if not isinstance(other, RestrictionType): 521 raise NotImplementedError 522 elif len(cls) < len(other): 523 return True 524 elif len(cls) == len(other) and cls.__name__ < other.__name__: 525 return True 526 else: 527 return False
528
529 530 -class AbstractCut(RestrictionType):
531 """Implement the methods that are common to all restriction enzymes. 532 533 All the methods are classmethod. 534 535 For internal use only. Not meant to be instantiated. 536 """ 537 538 @classmethod
539 - def search(cls, dna, linear=True):
540 """Return a list of cutting sites of the enzyme in the sequence. 541 542 Compensate for circular sequences and so on. 543 544 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 545 546 If linear is False, the restriction sites that span over the boundaries 547 will be included. 548 549 The positions are the first base of the 3' fragment, 550 i.e. the first base after the position the enzyme will cut. 551 """ 552 # 553 # Separating search from _search allow a (very limited) optimisation 554 # of the search when using a batch of restriction enzymes. 555 # in this case the DNA is tested once by the class which implements 556 # the batch instead of being tested by each enzyme single. 557 # see RestrictionBatch.search() for example. 558 # 559 if isinstance(dna, FormattedSeq): 560 cls.dna = dna 561 return cls._search() 562 else: 563 cls.dna = FormattedSeq(dna, linear) 564 return cls._search()
565 566 @classmethod
567 - def all_suppliers(cls):
568 """Print all the suppliers of restriction enzyme.""" 569 supply = sorted(x[0] for x in suppliers_dict.values()) 570 print(",\n".join(supply)) 571 return
572 573 @classmethod
574 - def is_equischizomer(cls, other):
575 """Test for real isoschizomer. 576 577 True if other is an isoschizomer of RE, but not an neoschizomer, 578 else False. 579 580 Equischizomer: same site, same position of restriction. 581 582 >>> from Bio.Restriction import SacI, SstI, SmaI, XmaI 583 >>> SacI.is_equischizomer(SstI) 584 True 585 >>> SmaI.is_equischizomer(XmaI) 586 False 587 588 """ 589 return not cls != other
590 591 @classmethod
592 - def is_neoschizomer(cls, other):
593 """Test for neoschizomer. 594 595 True if other is an isoschizomer of RE, else False. 596 Neoschizomer: same site, different position of restriction. 597 """ 598 return cls >> other
599 600 @classmethod
601 - def is_isoschizomer(cls, other):
602 """Test for same recognition site. 603 604 True if other has the same recognition site, else False. 605 606 Isoschizomer: same site. 607 608 >>> from Bio.Restriction import SacI, SstI, SmaI, XmaI 609 >>> SacI.is_isoschizomer(SstI) 610 True 611 >>> SmaI.is_isoschizomer(XmaI) 612 True 613 614 """ 615 return (not cls != other) or cls >> other
616 617 @classmethod
618 - def equischizomers(cls, batch=None):
619 """List equischizomers of the enzyme. 620 621 Return a tuple of all the isoschizomers of RE. 622 If batch is supplied it is used instead of the default AllEnzymes. 623 624 Equischizomer: same site, same position of restriction. 625 """ 626 if not batch: 627 batch = AllEnzymes 628 r = [x for x in batch if not cls != x] 629 i = r.index(cls) 630 del r[i] 631 r.sort() 632 return r
633 634 @classmethod
635 - def neoschizomers(cls, batch=None):
636 """List neoschizomers of the enzyme. 637 638 Return a tuple of all the neoschizomers of RE. 639 If batch is supplied it is used instead of the default AllEnzymes. 640 641 Neoschizomer: same site, different position of restriction. 642 """ 643 if not batch: 644 batch = AllEnzymes 645 r = sorted(x for x in batch if cls >> x) 646 return r
647 648 @classmethod
649 - def isoschizomers(cls, batch=None):
650 """List all isoschizomers of the enzyme. 651 652 Return a tuple of all the equischizomers and neoschizomers of RE. 653 If batch is supplied it is used instead of the default AllEnzymes. 654 """ 655 if not batch: 656 batch = AllEnzymes 657 r = [x for x in batch if (cls >> x) or (not cls != x)] 658 i = r.index(cls) 659 del r[i] 660 r.sort() 661 return r
662 663 @classmethod
664 - def frequency(cls):
665 """Return the theoretically cutting frequency of the enzyme. 666 667 Frequency of the site, given as 'one cut per x bases' (int). 668 """ 669 return cls.freq
670
671 672 -class NoCut(AbstractCut):
673 """Implement the methods specific to the enzymes that do not cut. 674 675 These enzymes are generally enzymes that have been only partially 676 characterised and the way they cut the DNA is unknow or enzymes for 677 which the pattern of cut is to complex to be recorded in Rebase 678 (ncuts values of 0 in emboss_e.###). 679 680 When using search() with these enzymes the values returned are at the start 681 of the restriction site. 682 683 Their catalyse() method returns a TypeError. 684 685 Unknown and NotDefined are also part of the base classes of these enzymes. 686 687 Internal use only. Not meant to be instantiated. 688 """ 689 690 @classmethod
691 - def cut_once(cls):
692 """Return if the cutting pattern has one cut. 693 694 True if the enzyme cut the sequence one time on each strand. 695 """ 696 return False
697 698 @classmethod
699 - def cut_twice(cls):
700 """Return if the cutting pattern has two cuts. 701 702 True if the enzyme cut the sequence twice on each strand. 703 """ 704 return False
705 706 @classmethod
707 - def _modify(cls, location):
708 """Return a generator that moves the cutting position by 1 (PRIVATE). 709 710 For internal use only. 711 712 location is an integer corresponding to the location of the match for 713 the enzyme pattern in the sequence. 714 _modify returns the real place where the enzyme will cut. 715 716 Example:: 717 718 EcoRI pattern : GAATTC 719 EcoRI will cut after the G. 720 so in the sequence: 721 ______ 722 GAATACACGGAATTCGA 723 | 724 10 725 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 726 EcoRI cut after the G so: 727 EcoRI._modify(10) -> 11. 728 729 If the enzyme cut twice _modify will returns two integer corresponding 730 to each cutting site. 731 """ 732 yield location
733 734 @classmethod
735 - def _rev_modify(cls, location):
736 """Return a generator that moves the cutting position by 1 (PRIVATE). 737 738 For internal use only. 739 740 As _modify for site situated on the antiparallel strand when the 741 enzyme is not palindromic. 742 """ 743 yield location
744 745 @classmethod
746 - def characteristic(cls):
747 """Return a list of the enzyme's characteristics as tuple. 748 749 the tuple contains the attributes: 750 751 - fst5 -> first 5' cut ((current strand) or None 752 - fst3 -> first 3' cut (complementary strand) or None 753 - scd5 -> second 5' cut (current strand) or None 754 - scd5 -> second 3' cut (complementary strand) or None 755 - site -> recognition site. 756 757 """ 758 return None, None, None, None, cls.site
759
760 761 -class OneCut(AbstractCut):
762 """Implement the methods for enzymes that cut the DNA only once. 763 764 Correspond to ncuts values of 2 in emboss_e.### 765 766 Internal use only. Not meant to be instantiated. 767 """ 768 769 @classmethod
770 - def cut_once(cls):
771 """Return if the cutting pattern has one cut. 772 773 True if the enzyme cut the sequence one time on each strand. 774 """ 775 return True
776 777 @classmethod
778 - def cut_twice(cls):
779 """Return if the cutting pattern has two cuts. 780 781 True if the enzyme cut the sequence twice on each strand. 782 """ 783 return False
784 785 @classmethod
786 - def _modify(cls, location):
787 """Return a generator that moves the cutting position by 1 (PRIVATE). 788 789 For internal use only. 790 791 location is an integer corresponding to the location of the match for 792 the enzyme pattern in the sequence. 793 _modify returns the real place where the enzyme will cut. 794 795 Example:: 796 797 EcoRI pattern : GAATTC 798 EcoRI will cut after the G. 799 so in the sequence: 800 ______ 801 GAATACACGGAATTCGA 802 | 803 10 804 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 805 EcoRI cut after the G so: 806 EcoRI._modify(10) -> 11. 807 808 if the enzyme cut twice _modify will returns two integer corresponding 809 to each cutting site. 810 """ 811 yield location + cls.fst5
812 813 @classmethod
814 - def _rev_modify(cls, location):
815 """Return a generator that moves the cutting position by 1 (PRIVATE). 816 817 For internal use only. 818 819 As _modify for site situated on the antiparallel strand when the 820 enzyme is not palindromic 821 """ 822 yield location - cls.fst3
823 824 @classmethod
825 - def characteristic(cls):
826 """Return a list of the enzyme's characteristics as tuple. 827 828 The tuple contains the attributes: 829 830 - fst5 -> first 5' cut ((current strand) or None 831 - fst3 -> first 3' cut (complementary strand) or None 832 - scd5 -> second 5' cut (current strand) or None 833 - scd5 -> second 3' cut (complementary strand) or None 834 - site -> recognition site. 835 836 """ 837 return cls.fst5, cls.fst3, None, None, cls.site
838
839 840 -class TwoCuts(AbstractCut):
841 """Implement the methods for enzymes that cut the DNA twice. 842 843 Correspond to ncuts values of 4 in emboss_e.### 844 845 Internal use only. Not meant to be instantiated. 846 """ 847 848 @classmethod
849 - def cut_once(cls):
850 """Return if the cutting pattern has one cut. 851 852 True if the enzyme cut the sequence one time on each strand. 853 """ 854 return False
855 856 @classmethod
857 - def cut_twice(cls):
858 """Return if the cutting pattern has two cuts. 859 860 True if the enzyme cut the sequence twice on each strand. 861 """ 862 return True
863 864 @classmethod
865 - def _modify(cls, location):
866 """Return a generator that moves the cutting position by 1 (PRIVATE). 867 868 For internal use only. 869 870 location is an integer corresponding to the location of the match for 871 the enzyme pattern in the sequence. 872 _modify returns the real place where the enzyme will cut. 873 874 example:: 875 876 EcoRI pattern : GAATTC 877 EcoRI will cut after the G. 878 so in the sequence: 879 ______ 880 GAATACACGGAATTCGA 881 | 882 10 883 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 884 EcoRI cut after the G so: 885 EcoRI._modify(10) -> 11. 886 887 if the enzyme cut twice _modify will returns two integer corresponding 888 to each cutting site. 889 """ 890 yield location + cls.fst5 891 yield location + cls.scd5
892 893 @classmethod
894 - def _rev_modify(cls, location):
895 """Return a generator that moves the cutting position by 1 (PRIVATE). 896 897 for internal use only. 898 899 as _modify for site situated on the antiparallel strand when the 900 enzyme is not palindromic 901 """ 902 yield location - cls.fst3 903 yield location - cls.scd3
904 905 @classmethod
906 - def characteristic(cls):
907 """Return a list of the enzyme's characteristics as tuple. 908 909 the tuple contains the attributes: 910 911 - fst5 -> first 5' cut ((current strand) or None 912 - fst3 -> first 3' cut (complementary strand) or None 913 - scd5 -> second 5' cut (current strand) or None 914 - scd5 -> second 3' cut (complementary strand) or None 915 - site -> recognition site. 916 917 """ 918 return cls.fst5, cls.fst3, cls.scd5, cls.scd3, cls.site
919
920 921 -class Meth_Dep(AbstractCut):
922 """Implement the information about methylation. 923 924 Enzymes of this class possess a site which is methylable. 925 """ 926 927 @classmethod
928 - def is_methylable(cls):
929 """Return if recognition site can be methylated. 930 931 True if the recognition site is a methylable. 932 """ 933 return True
934
935 936 -class Meth_Undep(AbstractCut):
937 """Implement information about methylation sensitibility. 938 939 Enzymes of this class are not sensible to methylation. 940 """ 941 942 @classmethod
943 - def is_methylable(cls):
944 """Return if recognition site can be methylated. 945 946 True if the recognition site is a methylable. 947 """ 948 return False
949
950 951 -class Palindromic(AbstractCut):
952 """Implement methods for enzymes with palindromic recognition sites. 953 954 palindromic means : the recognition site and its reverse complement are 955 identical. 956 Remarks : an enzyme with a site CGNNCG is palindromic even if some 957 of the sites that it will recognise are not. 958 for example here : CGAACG 959 960 Internal use only. Not meant to be instantiated. 961 """ 962 963 @classmethod
964 - def _search(cls):
965 """Return a list of cutting sites of the enzyme in the sequence (PRIVATE). 966 967 For internal use only. 968 969 Implement the search method for palindromic enzymes. 970 """ 971 siteloc = cls.dna.finditer(cls.compsite, cls.size) 972 cls.results = [r for s, g in siteloc for r in cls._modify(s)] 973 if cls.results: 974 cls._drop() 975 return cls.results
976 977 @classmethod
978 - def is_palindromic(cls):
979 """Return if the enzyme has a palindromic recoginition site.""" 980 return True
981
982 983 -class NonPalindromic(AbstractCut):
984 """Implement methods for enzymes with non-palindromic recognition sites. 985 986 Palindromic means : the recognition site and its reverse complement are 987 identical. 988 989 Internal use only. Not meant to be instantiated. 990 """ 991 992 @classmethod
993 - def _search(cls):
994 """Return a list of cutting sites of the enzyme in the sequence (PRIVATE). 995 996 For internal use only. 997 998 Implement the search method for non palindromic enzymes. 999 """ 1000 iterator = cls.dna.finditer(cls.compsite, cls.size) 1001 cls.results = [] 1002 modif = cls._modify 1003 revmodif = cls._rev_modify 1004 s = str(cls) 1005 cls.on_minus = [] 1006 1007 for start, group in iterator: 1008 if group(s): 1009 cls.results += [r for r in modif(start)] 1010 else: 1011 cls.on_minus += [r for r in revmodif(start)] 1012 cls.results += cls.on_minus 1013 1014 if cls.results: 1015 cls.results.sort() 1016 cls._drop() 1017 return cls.results
1018 1019 @classmethod
1020 - def is_palindromic(cls):
1021 """Return if the enzyme has a palindromic recoginition site.""" 1022 return False
1023
1024 1025 -class Unknown(AbstractCut):
1026 """Implement methods for enzymes that produce unknown overhangs. 1027 1028 These enzymes are also NotDefined and NoCut. 1029 1030 Internal use only. Not meant to be instantiated. 1031 """ 1032 1033 @classmethod
1034 - def catalyse(cls, dna, linear=True):
1035 """List the sequence fragments after cutting dna with enzyme. 1036 1037 Return a tuple of dna as will be produced by using RE to restrict the 1038 dna. 1039 1040 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1041 1042 If linear is False, the sequence is considered to be circular and the 1043 output will be modified accordingly. 1044 """ 1045 raise NotImplementedError('%s restriction is unknown.' 1046 % cls.__name__)
1047 catalyze = catalyse 1048 1049 @classmethod
1050 - def is_blunt(cls):
1051 """Return if the enzyme produces blunt ends. 1052 1053 True if the enzyme produces blunt end. 1054 1055 Related methods: 1056 1057 - RE.is_3overhang() 1058 - RE.is_5overhang() 1059 - RE.is_unknown() 1060 1061 """ 1062 return False
1063 1064 @classmethod
1065 - def is_5overhang(cls):
1066 """Return if the enzymes produces 5' overhanging ends. 1067 1068 True if the enzyme produces 5' overhang sticky end. 1069 1070 Related methods: 1071 1072 - RE.is_3overhang() 1073 - RE.is_blunt() 1074 - RE.is_unknown() 1075 1076 """ 1077 return False
1078 1079 @classmethod
1080 - def is_3overhang(cls):
1081 """Return if the enzyme produces 3' overhanging ends. 1082 1083 True if the enzyme produces 3' overhang sticky end. 1084 1085 Related methods: 1086 1087 - RE.is_5overhang() 1088 - RE.is_blunt() 1089 - RE.is_unknown() 1090 1091 """ 1092 return False
1093 1094 @classmethod
1095 - def overhang(cls):
1096 """Return the type of the enzyme's overhang as string. 1097 1098 Can be "3' overhang", "5' overhang", "blunt", "unknown". 1099 """ 1100 return 'unknown'
1101 1102 @classmethod
1103 - def compatible_end(cls):
1104 """List all enzymes that produce compatible ends for the enzyme.""" 1105 return []
1106 1107 @classmethod
1108 - def _mod1(cls, other):
1109 """Test if other enzyme produces compatible ends for enzyme (PRIVATE). 1110 1111 For internal use only. 1112 1113 Test for the compatibility of restriction ending of RE and other. 1114 """ 1115 return False
1116
1117 1118 -class Blunt(AbstractCut):
1119 """Implement methods for enzymes that produce blunt ends. 1120 1121 The enzyme cuts the + strand and the - strand of the DNA at the same 1122 place. 1123 1124 Internal use only. Not meant to be instantiated. 1125 """ 1126 1127 @classmethod
1128 - def catalyse(cls, dna, linear=True):
1129 """List the sequence fragments after cutting dna with enzyme. 1130 1131 Return a tuple of dna as will be produced by using RE to restrict the 1132 dna. 1133 1134 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1135 1136 If linear is False, the sequence is considered to be circular and the 1137 output will be modified accordingly. 1138 """ 1139 r = cls.search(dna, linear) 1140 d = cls.dna 1141 if not r: 1142 return d[1:], 1143 fragments = [] 1144 length = len(r) - 1 1145 if d.is_linear(): 1146 # 1147 # START of the sequence to FIRST site. 1148 # 1149 fragments.append(d[1:r[0]]) 1150 if length: 1151 # 1152 # if more than one site add them. 1153 # 1154 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1155 # 1156 # LAST site to END of the sequence. 1157 # 1158 fragments.append(d[r[-1]:]) 1159 else: 1160 # 1161 # circular : bridge LAST site to FIRST site. 1162 # 1163 fragments.append(d[r[-1]:] + d[1:r[0]]) 1164 if not length: 1165 # 1166 # one site we finish here. 1167 # 1168 return tuple(fragments) 1169 # 1170 # add the others. 1171 # 1172 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1173 return tuple(fragments)
1174 catalyze = catalyse 1175 1176 @classmethod
1177 - def is_blunt(cls):
1178 """Return if the enzyme produces blunt ends. 1179 1180 True if the enzyme produces blunt end. 1181 1182 Related methods: 1183 1184 - RE.is_3overhang() 1185 - RE.is_5overhang() 1186 - RE.is_unknown() 1187 1188 """ 1189 return True
1190 1191 @classmethod
1192 - def is_5overhang(cls):
1193 """Return if the enzymes produces 5' overhanging ends. 1194 1195 True if the enzyme produces 5' overhang sticky end. 1196 1197 Related methods: 1198 1199 - RE.is_3overhang() 1200 - RE.is_blunt() 1201 - RE.is_unknown() 1202 1203 """ 1204 return False
1205 1206 @classmethod
1207 - def is_3overhang(cls):
1208 """Return if the enzyme produces 3' overhanging ends. 1209 1210 True if the enzyme produces 3' overhang sticky end. 1211 1212 Related methods: 1213 1214 - RE.is_5overhang() 1215 - RE.is_blunt() 1216 - RE.is_unknown() 1217 1218 """ 1219 return False
1220 1221 @classmethod
1222 - def overhang(cls):
1223 """Return the type of the enzyme's overhang as string. 1224 1225 Can be "3' overhang", "5' overhang", "blunt", "unknown". 1226 """ 1227 return 'blunt'
1228 1229 @classmethod
1230 - def compatible_end(cls, batch=None):
1231 """List all enzymes that produce compatible ends for the enzyme.""" 1232 if not batch: 1233 batch = AllEnzymes 1234 r = sorted(x for x in iter(AllEnzymes) if x.is_blunt()) 1235 return r
1236 1237 @staticmethod
1238 - def _mod1(other):
1239 """Test if other enzyme produces compatible ends for enzyme (PRIVATE). 1240 1241 For internal use only 1242 1243 Test for the compatibility of restriction ending of RE and other. 1244 """ 1245 return issubclass(other, Blunt)
1246
1247 1248 -class Ov5(AbstractCut):
1249 """Implement methods for enzymes that produce 5' overhanging ends. 1250 1251 The enzyme cuts the + strand after the - strand of the DNA. 1252 1253 Internal use only. Not meant to be instantiated. 1254 """ 1255 1256 @classmethod
1257 - def catalyse(cls, dna, linear=True):
1258 """List the sequence fragments after cutting dna with enzyme. 1259 1260 Return a tuple of dna as will be produced by using RE to restrict the 1261 dna. 1262 1263 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1264 1265 If linear is False, the sequence is considered to be circular and the 1266 output will be modified accordingly. 1267 """ 1268 r = cls.search(dna, linear) 1269 d = cls.dna 1270 if not r: 1271 return d[1:], 1272 length = len(r) - 1 1273 fragments = [] 1274 if d.is_linear(): 1275 # 1276 # START of the sequence to FIRST site. 1277 # 1278 fragments.append(d[1:r[0]]) 1279 if length: 1280 # 1281 # if more than one site add them. 1282 # 1283 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1284 # 1285 # LAST site to END of the sequence. 1286 # 1287 fragments.append(d[r[-1]:]) 1288 else: 1289 # 1290 # circular : bridge LAST site to FIRST site. 1291 # 1292 fragments.append(d[r[-1]:] + d[1:r[0]]) 1293 if not length: 1294 # 1295 # one site we finish here. 1296 # 1297 return tuple(fragments) 1298 # 1299 # add the others. 1300 # 1301 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1302 return tuple(fragments)
1303 catalyze = catalyse 1304 1305 @classmethod
1306 - def is_blunt(cls):
1307 """Return if the enzyme produces blunt ends. 1308 1309 True if the enzyme produces blunt end. 1310 1311 Related methods: 1312 1313 - RE.is_3overhang() 1314 - RE.is_5overhang() 1315 - RE.is_unknown() 1316 1317 """ 1318 return False
1319 1320 @classmethod
1321 - def is_5overhang(cls):
1322 """Return if the enzymes produces 5' overhanging ends. 1323 1324 True if the enzyme produces 5' overhang sticky end. 1325 1326 Related methods: 1327 1328 - RE.is_3overhang() 1329 - RE.is_blunt() 1330 - RE.is_unknown() 1331 1332 """ 1333 return True
1334 1335 @classmethod
1336 - def is_3overhang(cls):
1337 """Return if the enzyme produces 3' overhanging ends. 1338 1339 True if the enzyme produces 3' overhang sticky end. 1340 1341 Related methods: 1342 1343 - RE.is_5overhang() 1344 - RE.is_blunt() 1345 - RE.is_unknown() 1346 1347 """ 1348 return False
1349 1350 @classmethod
1351 - def overhang(cls):
1352 """Return the type of the enzyme's overhang as string. 1353 1354 Can be "3' overhang", "5' overhang", "blunt", "unknown". 1355 """ 1356 return "5' overhang"
1357 1358 @classmethod
1359 - def compatible_end(cls, batch=None):
1360 """List all enzymes that produce compatible ends for the enzyme.""" 1361 if not batch: 1362 batch = AllEnzymes 1363 r = sorted(x for x in iter(AllEnzymes) if x.is_5overhang() and 1364 x % cls) 1365 return r
1366 1367 @classmethod
1368 - def _mod1(cls, other):
1369 """Test if other enzyme produces compatible ends for enzyme (PRIVATE). 1370 1371 For internal use only. 1372 1373 Test for the compatibility of restriction ending of RE and other. 1374 """ 1375 if issubclass(other, Ov5): 1376 return cls._mod2(other) 1377 else: 1378 return False
1379
1380 1381 -class Ov3(AbstractCut):
1382 """Implement methods for enzymes that produce 3' overhanging ends. 1383 1384 The enzyme cuts the - strand after the + strand of the DNA. 1385 1386 Internal use only. Not meant to be instantiated. 1387 """ 1388 1389 @classmethod
1390 - def catalyse(cls, dna, linear=True):
1391 """List the sequence fragments after cutting dna with enzyme. 1392 1393 Return a tuple of dna as will be produced by using RE to restrict the 1394 dna. 1395 1396 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1397 1398 If linear is False, the sequence is considered to be circular and the 1399 output will be modified accordingly. 1400 """ 1401 r = cls.search(dna, linear) 1402 d = cls.dna 1403 if not r: 1404 return d[1:], 1405 fragments = [] 1406 length = len(r) - 1 1407 if d.is_linear(): 1408 # 1409 # START of the sequence to FIRST site. 1410 # 1411 fragments.append(d[1:r[0]]) 1412 if length: 1413 # 1414 # if more than one site add them. 1415 # 1416 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1417 # 1418 # LAST site to END of the sequence. 1419 # 1420 fragments.append(d[r[-1]:]) 1421 else: 1422 # 1423 # circular : bridge LAST site to FIRST site. 1424 # 1425 fragments.append(d[r[-1]:] + d[1:r[0]]) 1426 if not length: 1427 # 1428 # one site we finish here. 1429 # 1430 return tuple(fragments) 1431 # 1432 # add the others. 1433 # 1434 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1435 return tuple(fragments)
1436 catalyze = catalyse 1437 1438 @classmethod
1439 - def is_blunt(cls):
1440 """Return if the enzyme produces blunt ends. 1441 1442 True if the enzyme produces blunt end. 1443 1444 Related methods: 1445 1446 - RE.is_3overhang() 1447 - RE.is_5overhang() 1448 - RE.is_unknown() 1449 1450 """ 1451 return False
1452 1453 @classmethod
1454 - def is_5overhang(cls):
1455 """Return if the enzymes produces 5' overhanging ends. 1456 1457 True if the enzyme produces 5' overhang sticky end. 1458 1459 Related methods: 1460 1461 - RE.is_3overhang() 1462 - RE.is_blunt() 1463 - RE.is_unknown() 1464 1465 """ 1466 return False
1467 1468 @classmethod
1469 - def is_3overhang(cls):
1470 """Return if the enzyme produces 3' overhanging ends. 1471 1472 True if the enzyme produces 3' overhang sticky end. 1473 1474 Related methods: 1475 1476 - RE.is_5overhang() 1477 - RE.is_blunt() 1478 - RE.is_unknown() 1479 1480 """ 1481 return True
1482 1483 @classmethod
1484 - def overhang(cls):
1485 """Return the type of the enzyme's overhang as string. 1486 1487 Can be "3' overhang", "5' overhang", "blunt", "unknown". 1488 """ 1489 return "3' overhang"
1490 1491 @classmethod
1492 - def compatible_end(cls, batch=None):
1493 """List all enzymes that produce compatible ends for the enzyme.""" 1494 if not batch: 1495 batch = AllEnzymes 1496 r = sorted(x for x in iter(AllEnzymes) if x.is_3overhang() and 1497 x % cls) 1498 return r
1499 1500 @classmethod
1501 - def _mod1(cls, other):
1502 """Test if other enzyme produces compatible ends for enzyme (PRIVATE). 1503 1504 For internal use only. 1505 1506 Test for the compatibility of restriction ending of RE and other. 1507 """ 1508 # 1509 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1510 # 1511 if issubclass(other, Ov3): 1512 return cls._mod2(other) 1513 else: 1514 return False
1515
1516 1517 -class Defined(AbstractCut):
1518 """Implement methods for enzymes with defined recognition site and cut. 1519 1520 Typical example : EcoRI -> G^AATT_C 1521 The overhang will always be AATT 1522 Notes: 1523 Blunt enzymes are always defined. Even if their site is GGATCCNNN^_N 1524 Their overhang is always the same : blunt! 1525 1526 Internal use only. Not meant to be instantiated. 1527 """ 1528 1529 @classmethod
1530 - def _drop(cls):
1531 """Remove cuts that are outsite of the sequence (PRIVATE). 1532 1533 For internal use only. 1534 1535 Drop the site that are situated outside the sequence in linear 1536 sequence. Modify the index for site in circular sequences. 1537 """ 1538 # 1539 # remove or modify the results that are outside the sequence. 1540 # This is necessary since after finding the site we add the distance 1541 # from the site to the cut with the _modify and _rev_modify methods. 1542 # For linear we will remove these sites altogether. 1543 # For circular sequence, we modify the result rather than _drop it 1544 # since the site is in the sequence. 1545 # 1546 length = len(cls.dna) 1547 drop = itertools.dropwhile 1548 take = itertools.takewhile 1549 if cls.dna.is_linear(): 1550 cls.results = [x for x in drop(lambda x:x <= 1, cls.results)] 1551 cls.results = [x for x in take(lambda x:x <= length, cls.results)] 1552 else: 1553 for index, location in enumerate(cls.results): 1554 if location < 1: 1555 cls.results[index] += length 1556 else: 1557 break 1558 for index, location in enumerate(cls.results[::-1]): 1559 if location > length: 1560 cls.results[-(index + 1)] -= length 1561 else: 1562 break 1563 return
1564 1565 @classmethod
1566 - def is_defined(cls):
1567 """Return if recognition sequence and cut are defined. 1568 1569 True if the sequence recognised and cut is constant, 1570 i.e. the recognition site is not degenerated AND the enzyme cut inside 1571 the site. 1572 1573 Related methods: 1574 1575 - RE.is_ambiguous() 1576 - RE.is_unknown() 1577 1578 """ 1579 return True
1580 1581 @classmethod
1582 - def is_ambiguous(cls):
1583 """Return if recognition sequence and cut may be ambiguous. 1584 1585 True if the sequence recognised and cut is ambiguous, 1586 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1587 the site. 1588 1589 Related methods: 1590 1591 - RE.is_defined() 1592 - RE.is_unknown() 1593 1594 """ 1595 return False
1596 1597 @classmethod
1598 - def is_unknown(cls):
1599 """Return if recognition sequence is unknown. 1600 1601 True if the sequence is unknown, 1602 i.e. the recognition site has not been characterised yet. 1603 1604 Related methods: 1605 1606 - RE.is_defined() 1607 - RE.is_ambiguous() 1608 1609 """ 1610 return False
1611 1612 @classmethod
1613 - def elucidate(cls):
1614 """Return a string representing the recognition site and cuttings. 1615 1616 Return a representation of the site with the cut on the (+) strand 1617 represented as '^' and the cut on the (-) strand as '_'. 1618 ie: 1619 1620 >>> from Bio.Restriction import EcoRI, KpnI, EcoRV, SnaI 1621 >>> EcoRI.elucidate() # 5' overhang 1622 'G^AATT_C' 1623 >>> KpnI.elucidate() # 3' overhang 1624 'G_GTAC^C' 1625 >>> EcoRV.elucidate() # blunt 1626 'GAT^_ATC' 1627 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1628 '? GTATAC ?' 1629 >>> 1630 1631 """ 1632 f5 = cls.fst5 1633 f3 = cls.fst3 1634 site = cls.site 1635 if cls.cut_twice(): 1636 re = 'cut twice, not yet implemented sorry.' 1637 elif cls.is_5overhang(): 1638 if f5 == f3 == 0: 1639 re = 'N^' + cls.site + '_N' 1640 elif f3 == 0: 1641 re = site[:f5] + '^' + site[f5:] + '_N' 1642 else: 1643 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1644 elif cls.is_blunt(): 1645 re = site[:f5] + '^_' + site[f5:] 1646 else: 1647 if f5 == f3 == 0: 1648 re = 'N_' + site + '^N' 1649 else: 1650 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:] 1651 return re
1652 1653 @classmethod
1654 - def _mod2(cls, other):
1655 """Test if other enzyme produces compatible ends for enzyme (PRIVATE). 1656 1657 For internal use only. 1658 1659 Test for the compatibility of restriction ending of RE and other. 1660 """ 1661 # 1662 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1663 # 1664 if other.ovhgseq == cls.ovhgseq: 1665 return True 1666 elif issubclass(other, Ambiguous): 1667 return other._mod2(cls) 1668 else: 1669 return False
1670
1671 1672 -class Ambiguous(AbstractCut):
1673 """Implement methods for enzymes that produce variable overhangs. 1674 1675 Typical example : BstXI -> CCAN_NNNN^NTGG 1676 The overhang can be any sequence of 4 bases. 1677 1678 Notes: 1679 Blunt enzymes are always defined. Even if their site is GGATCCNNN^_N 1680 Their overhang is always the same : blunt! 1681 1682 Internal use only. Not meant to be instantiated. 1683 1684 """ 1685 1686 @classmethod
1687 - def _drop(cls):
1688 """Remove cuts that are outsite of the sequence (PRIVATE). 1689 1690 For internal use only. 1691 1692 Drop the site that are situated outside the sequence in linear 1693 sequence. Modify the index for site in circular sequences. 1694 """ 1695 length = len(cls.dna) 1696 drop = itertools.dropwhile 1697 take = itertools.takewhile 1698 if cls.dna.is_linear(): 1699 cls.results = [x for x in drop(lambda x: x <= 1, cls.results)] 1700 cls.results = [x for x in take(lambda x: x <= length, cls.results)] 1701 else: 1702 for index, location in enumerate(cls.results): 1703 if location < 1: 1704 cls.results[index] += length 1705 else: 1706 break 1707 for index, location in enumerate(cls.results[::-1]): 1708 if location > length: 1709 cls.results[-(index + 1)] -= length 1710 else: 1711 break 1712 return
1713 1714 @classmethod
1715 - def is_defined(cls):
1716 """Return if recognition sequence and cut are defined. 1717 1718 True if the sequence recognised and cut is constant, 1719 i.e. the recognition site is not degenerated AND the enzyme cut inside 1720 the site. 1721 1722 Related methods: 1723 1724 - RE.is_ambiguous() 1725 - RE.is_unknown() 1726 1727 """ 1728 return False
1729 1730 @classmethod
1731 - def is_ambiguous(cls):
1732 """Return if recognition sequence and cut may be ambiguous. 1733 1734 True if the sequence recognised and cut is ambiguous, 1735 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1736 the site. 1737 1738 Related methods: 1739 1740 - RE.is_defined() 1741 - RE.is_unknown() 1742 1743 """ 1744 return True
1745 1746 @classmethod
1747 - def is_unknown(cls):
1748 """Return if recognition sequence is unknown. 1749 1750 True if the sequence is unknown, 1751 i.e. the recognition site has not been characterised yet. 1752 1753 Related methods: 1754 1755 - RE.is_defined() 1756 - RE.is_ambiguous() 1757 1758 """ 1759 return False
1760 1761 @classmethod
1762 - def _mod2(cls, other):
1763 """Test if other enzyme produces compatible ends for enzyme (PRIVATE). 1764 1765 For internal use only. 1766 1767 Test for the compatibility of restriction ending of RE and other. 1768 """ 1769 # 1770 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1771 # 1772 if len(cls.ovhgseq) != len(other.ovhgseq): 1773 return False 1774 else: 1775 se = cls.ovhgseq 1776 for base in se: 1777 if base in 'ATCG': 1778 pass 1779 if base in 'N': 1780 se = '.'.join(se.split('N')) 1781 if base in 'RYWMSKHDBV': 1782 expand = '[' + matching[base] + ']' 1783 se = expand.join(se.split(base)) 1784 if re.match(se, other.ovhgseq): 1785 return True 1786 else: 1787 return False
1788 1789 @classmethod
1790 - def elucidate(cls):
1791 """Return a string representing the recognition site and cuttings. 1792 1793 Return a representation of the site with the cut on the (+) strand 1794 represented as '^' and the cut on the (-) strand as '_'. 1795 ie: 1796 1797 >>> from Bio.Restriction import EcoRI, KpnI, EcoRV, SnaI 1798 >>> EcoRI.elucidate() # 5' overhang 1799 'G^AATT_C' 1800 >>> KpnI.elucidate() # 3' overhang 1801 'G_GTAC^C' 1802 >>> EcoRV.elucidate() # blunt 1803 'GAT^_ATC' 1804 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1805 '? GTATAC ?' 1806 >>> 1807 1808 """ 1809 f5 = cls.fst5 1810 f3 = cls.fst3 1811 length = len(cls) 1812 site = cls.site 1813 if cls.cut_twice(): 1814 re = 'cut twice, not yet implemented sorry.' 1815 elif cls.is_5overhang(): 1816 if f3 == f5 == 0: 1817 re = 'N^' + site + '_N' 1818 elif 0 <= f5 <= length and 0 <= f3 + length <= length: 1819 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1820 elif 0 <= f5 <= length: 1821 re = site[:f5] + '^' + site[f5:] + f3 * 'N' + '_N' 1822 elif 0 <= f3 + length <= length: 1823 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:] 1824 elif f3 + length < 0: 1825 re = 'N^' * abs(f5) * 'N' + '_' + abs(length + f3) * 'N' + site 1826 elif f5 > length: 1827 re = site + (f5 - length) * 'N' + '^' + (length + 1828 f3 - f5) * 'N' + '_N' 1829 else: 1830 re = 'N^' + abs(f5) * 'N' + site + f3 * 'N' + '_N' 1831 elif cls.is_blunt(): 1832 if f5 < 0: 1833 re = 'N^_' + abs(f5) * 'N' + site 1834 elif f5 > length: 1835 re = site + (f5 - length) * 'N' + '^_N' 1836 else: 1837 raise ValueError('%s.easyrepr() : error f5=%i' 1838 % (cls.name, f5)) 1839 else: 1840 if f3 == 0: 1841 if f5 == 0: 1842 re = 'N_' + site + '^N' 1843 else: 1844 re = site + '_' + (f5 - length) * 'N' + '^N' 1845 elif 0 < f3 + length <= length and 0 <= f5 <= length: 1846 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:] 1847 elif 0 < f3 + length <= length: 1848 re = site[:f3] + '_' + site[f3:] + (f5 - length) * 'N' + '^N' 1849 elif 0 <= f5 <= length: 1850 re = 'N_' + 'N' * (f3 + length) + site[:f5] + '^' + site[f5:] 1851 elif f3 > 0: 1852 re = site + f3 * 'N' + '_' + (f5 - f3 - length) * 'N' + '^N' 1853 elif f5 < 0: 1854 re = 'N_' + abs(f3 - f5 + length) * 'N' + '^' + abs(f5) * 'N' \ 1855 + site 1856 else: 1857 re = 'N_' + abs(f3 + length) * 'N' + site + (f5 - length) * \ 1858 'N' + '^N' 1859 return re
1860
1861 1862 -class NotDefined(AbstractCut):
1863 """Implement methods for enzymes with non-characterized overhangs. 1864 1865 Correspond to NoCut and Unknown. 1866 1867 Internal use only. Not meant to be instantiated. 1868 """ 1869 1870 @classmethod
1871 - def _drop(cls):
1872 """Remove cuts that are outsite of the sequence (PRIVATE). 1873 1874 For internal use only. 1875 1876 Drop the site that are situated outside the sequence in linear 1877 sequence. Modify the index for site in circular sequences. 1878 """ 1879 if cls.dna.is_linear(): 1880 return 1881 else: 1882 length = len(cls.dna) 1883 for index, location in enumerate(cls.results): 1884 if location < 1: 1885 cls.results[index] += length 1886 else: 1887 break 1888 for index, location in enumerate(cls.results[:-1]): 1889 if location > length: 1890 cls.results[-(index + 1)] -= length 1891 else: 1892 break 1893 return
1894 1895 @classmethod
1896 - def is_defined(cls):
1897 """Return if recognition sequence and cut are defined. 1898 1899 True if the sequence recognised and cut is constant, 1900 i.e. the recognition site is not degenerated AND the enzyme cut inside 1901 the site. 1902 1903 Related methods: 1904 1905 - RE.is_ambiguous() 1906 - RE.is_unknown() 1907 1908 """ 1909 return False
1910 1911 @classmethod
1912 - def is_ambiguous(cls):
1913 """Return if recognition sequence and cut may be ambiguous. 1914 1915 True if the sequence recognised and cut is ambiguous, 1916 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1917 the site. 1918 1919 Related methods: 1920 1921 - RE.is_defined() 1922 - RE.is_unknown() 1923 1924 """ 1925 return False
1926 1927 @classmethod
1928 - def is_unknown(cls):
1929 """Return if recognition sequence is unknown. 1930 1931 True if the sequence is unknown, 1932 i.e. the recognition site has not been characterised yet. 1933 1934 Related methods: 1935 1936 - RE.is_defined() 1937 - RE.is_ambiguous() 1938 1939 """ 1940 return True
1941 1942 @classmethod
1943 - def _mod2(cls, other):
1944 """Test if other enzyme produces compatible ends for enzyme (PRIVATE). 1945 1946 For internal use only. 1947 1948 Test for the compatibility of restriction ending of RE and other. 1949 """ 1950 # 1951 # Normally we should not arrive here. But well better safe than 1952 # sorry. 1953 # the overhang is not defined we are compatible with nobody. 1954 # could raise an Error may be rather than return quietly. 1955 # 1956 # return False 1957 raise ValueError("%s.mod2(%s), %s : NotDefined. pas glop pas glop!" 1958 % (str(cls), str(other), str(cls)))
1959 1960 @classmethod
1961 - def elucidate(cls):
1962 """Return a string representing the recognition site and cuttings. 1963 1964 Return a representation of the site with the cut on the (+) strand 1965 represented as '^' and the cut on the (-) strand as '_'. 1966 ie: 1967 1968 >>> from Bio.Restriction import EcoRI, KpnI, EcoRV, SnaI 1969 >>> EcoRI.elucidate() # 5' overhang 1970 'G^AATT_C' 1971 >>> KpnI.elucidate() # 3' overhang 1972 'G_GTAC^C' 1973 >>> EcoRV.elucidate() # blunt 1974 'GAT^_ATC' 1975 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1976 '? GTATAC ?' 1977 >>> 1978 1979 """ 1980 return '? %s ?' % cls.site
1981
1982 1983 -class Commercially_available(AbstractCut):
1984 """Implement methods for enzymes which are commercially available. 1985 1986 Internal use only. Not meant to be instantiated. 1987 """ 1988 1989 # 1990 # Recent addition to Rebase make this naming convention uncertain. 1991 # May be better to says enzymes which have a supplier. 1992 # 1993 1994 @classmethod
1995 - def suppliers(cls):
1996 """Print a list of suppliers of the enzyme.""" 1997 for s in cls.suppl: 1998 print(suppliers_dict[s][0] + ',') 1999 return
2000 2001 @classmethod
2002 - def supplier_list(cls):
2003 """Return a list of suppliers of the enzyme.""" 2004 return [v[0] for k, v in suppliers_dict.items() if k in cls.suppl]
2005 2006 @classmethod
2007 - def buffers(cls, supplier):
2008 """Return the recommended buffer of the supplier for this enzyme. 2009 2010 Not implemented yet. 2011 """ 2012 return
2013 2014 @classmethod
2015 - def is_comm(cls):
2016 """Return if enzyme is commercially available. 2017 2018 True if RE has suppliers. 2019 """ 2020 return True
2021
2022 2023 -class Not_available(AbstractCut):
2024 """Implement methods for enzymes which are not commercially available. 2025 2026 Internal use only. Not meant to be instantiated. 2027 """ 2028 2029 @staticmethod
2030 - def suppliers():
2031 """Print a list of suppliers of the enzyme.""" 2032 return None
2033 2034 @classmethod
2035 - def supplier_list(cls):
2036 """Return a list of suppliers of the enzyme.""" 2037 return []
2038 2039 @classmethod
2040 - def buffers(cls, supplier):
2041 """Return the recommended buffer of the supplier for this enzyme. 2042 2043 Not implemented yet. 2044 """ 2045 raise TypeError("Enzyme not commercially available.")
2046 2047 @classmethod
2048 - def is_comm(cls):
2049 """Return if enzyme is commercially available. 2050 2051 True if RE has suppliers. 2052 """ 2053 return False
2054
2055 2056 ############################################################################### 2057 # # 2058 # Restriction Batch # 2059 # # 2060 ############################################################################### 2061 2062 2063 -class RestrictionBatch(set):
2064 """Class for operations on more than one enzyme.""" 2065
2066 - def __init__(self, first=(), suppliers=()):
2067 """Initialize empty RB or pre-fill with enzymes (from supplier).""" 2068 first = [self.format(x) for x in first] 2069 first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]] 2070 set.__init__(self, first) 2071 self.mapping = dict.fromkeys(self) 2072 self.already_mapped = None 2073 self.suppliers = [x for x in suppliers if x in suppliers_dict]
2074
2075 - def __str__(self):
2076 """Return a readable representation of the ``RestrictionBatch``.""" 2077 if len(self) < 5: 2078 return '+'.join(self.elements()) 2079 else: 2080 return '...'.join(('+'.join(self.elements()[:2]), 2081 '+'.join(self.elements()[-2:])))
2082
2083 - def __repr__(self):
2084 """Represent ``RestrictionBatch`` class as a string for debugging.""" 2085 return 'RestrictionBatch(%s)' % self.elements()
2086
2087 - def __contains__(self, other):
2088 """Implement ``in`` for ``RestrictionBatch``.""" 2089 try: 2090 other = self.format(other) 2091 except ValueError: # other is not a restriction enzyme 2092 return False 2093 return set.__contains__(self, other)
2094
2095 - def __div__(self, other):
2096 """Override '/' operator to use as search method.""" 2097 return self.search(other)
2098
2099 - def __rdiv__(self, other):
2100 """Override division with reversed operands to use as search method.""" 2101 return self.search(other)
2102
2103 - def __truediv__(self, other):
2104 """Override Python 3 division operator to use as search method. 2105 2106 Like __div__. 2107 """ 2108 return self.search(other)
2109
2110 - def __rtruediv__(self, other):
2111 """As __truediv___, with reversed operands. 2112 2113 Like __rdiv__. 2114 """ 2115 return self.search(other)
2116
2117 - def get(self, enzyme, add=False):
2118 """Check if enzyme is in batch and return it. 2119 2120 If add is True and enzyme is not in batch add enzyme to batch. 2121 If add is False (which is the default) only return enzyme. 2122 If enzyme is not a RestrictionType or can not be evaluated to 2123 a RestrictionType, raise a ValueError. 2124 """ 2125 e = self.format(enzyme) 2126 if e in self: 2127 return e 2128 elif add: 2129 self.add(e) 2130 return e 2131 else: 2132 raise ValueError('enzyme %s is not in RestrictionBatch' 2133 % e.__name__)
2134
2135 - def lambdasplit(self, func):
2136 """Filter enzymes in batch with supplied function. 2137 2138 The new batch will contain only the enzymes for which 2139 func return True. 2140 """ 2141 d = [x for x in filter(func, self)] 2142 new = RestrictionBatch() 2143 new._data = dict(zip(d, [True] * len(d))) 2144 return new
2145
2146 - def add_supplier(self, letter):
2147 """Add all enzymes from a given supplier to batch. 2148 2149 letter represents the suppliers as defined in the dictionary 2150 RestrictionDictionary.suppliers 2151 Returns None. 2152 Raise a KeyError if letter is not a supplier code. 2153 """ 2154 supplier = suppliers_dict[letter] 2155 self.suppliers.append(letter) 2156 for x in supplier[1]: 2157 self.add_nocheck(eval(x)) 2158 return
2159
2160 - def current_suppliers(self):
2161 """List the current suppliers for the restriction batch. 2162 2163 Return a sorted list of the suppliers which have been used to 2164 create the batch. 2165 """ 2166 suppl_list = sorted(suppliers_dict[x][0] for x in self.suppliers) 2167 return suppl_list
2168
2169 - def __iadd__(self, other):
2170 """Override '+=' for use with sets. 2171 2172 b += other -> add other to b, check the type of other. 2173 """ 2174 self.add(other) 2175 return self
2176
2177 - def __add__(self, other):
2178 """Overide '+' for use with sets. 2179 2180 b + other -> new RestrictionBatch. 2181 """ 2182 new = self.__class__(self) 2183 new.add(other) 2184 return new
2185
2186 - def remove(self, other):
2187 """Remove enzyme from restriction batch. 2188 2189 Safe set.remove method. Verify that other is a RestrictionType or can 2190 be evaluated to a RestrictionType. 2191 Raise a ValueError if other can not be evaluated to a RestrictionType. 2192 Raise a KeyError if other is not in B. 2193 """ 2194 return set.remove(self, self.format(other))
2195
2196 - def add(self, other):
2197 """Add a restriction enzyme to the restriction batch. 2198 2199 Safe set.add method. Verify that other is a RestrictionType or can be 2200 evaluated to a RestrictionType. 2201 Raise a ValueError if other can not be evaluated to a RestrictionType. 2202 """ 2203 return set.add(self, self.format(other))
2204
2205 - def add_nocheck(self, other):
2206 """Add restriction enzyme to batch without checking its type.""" 2207 return set.add(self, other)
2208
2209 - def format(self, y):
2210 """Evaluate enzyme (name) and return it (as RestrictionType). 2211 2212 If y is a RestrictionType return y. 2213 If y can be evaluated to a RestrictionType return eval(y). 2214 Raise a ValueError in all other case. 2215 """ 2216 try: 2217 if isinstance(y, RestrictionType): 2218 return y 2219 elif isinstance(eval(str(y)), RestrictionType): 2220 return eval(y) 2221 else: 2222 pass 2223 except (NameError, SyntaxError): 2224 pass 2225 raise ValueError('%s is not a RestrictionType' % y.__class__)
2226
2227 - def is_restriction(self, y):
2228 """Return if enzyme (name) is a known enzyme. 2229 2230 True if y or eval(y) is a RestrictionType. 2231 """ 2232 return (isinstance(y, RestrictionType) or 2233 isinstance(eval(str(y)), RestrictionType))
2234
2235 - def split(self, *classes, **bool):
2236 """Extract enzymes of a certain class and put in new RestrictionBatch. 2237 2238 It works but it is slow, so it has really an interest when splitting 2239 over multiple conditions. 2240 """ 2241 def splittest(element): 2242 for klass in classes: 2243 b = bool.get(klass.__name__, True) 2244 if issubclass(element, klass): 2245 if b: 2246 continue 2247 else: 2248 return False 2249 elif b: 2250 return False 2251 else: 2252 continue 2253 return True
2254 d = [k for k in filter(splittest, self)] 2255 new = RestrictionBatch() 2256 new._data = dict(zip(d, [True] * len(d))) 2257 return new
2258
2259 - def elements(self):
2260 """List the enzymes of the RestrictionBatch as list of strings. 2261 2262 Give all the names of the enzymes in B sorted alphabetically. 2263 """ 2264 return sorted(str(e) for e in self)
2265
2266 - def as_string(self):
2267 """List the names of the enzymes of the RestrictionBatch. 2268 2269 Return a list of the name of the elements of the batch. 2270 """ 2271 return [str(e) for e in self]
2272 2273 @classmethod
2274 - def suppl_codes(cls):
2275 """Return a dicionary with supplier codes. 2276 2277 Letter code for the suppliers. 2278 """ 2279 supply = {k: v[0] for k, v in suppliers_dict.items()} 2280 return supply
2281 2282 @classmethod
2283 - def show_codes(cls):
2284 """Print a list of supplier codes.""" 2285 supply = [' = '.join(i) for i in cls.suppl_codes().items()] 2286 print('\n'.join(supply)) 2287 return
2288
2289 - def search(self, dna, linear=True):
2290 """Return a dic of cutting sites in the seq for the batch enzymes.""" 2291 # 2292 # here we replace the search method of the individual enzymes 2293 # with one unique testing method. 2294 # 2295 if not hasattr(self, "already_mapped"): 2296 # TODO - Why does this happen! 2297 # Try the "doctest" at the start of PrintFormat.py 2298 self.already_mapped = None 2299 if isinstance(dna, DNA): 2300 # For the searching, we just care about the sequence as a string, 2301 # if that is the same we can use the cached search results. 2302 # At the time of writing, Seq == method isn't implemented, 2303 # and therefore does object identity which is stricter. 2304 if (str(dna), linear) == self.already_mapped: 2305 return self.mapping 2306 else: 2307 self.already_mapped = str(dna), linear 2308 fseq = FormattedSeq(dna, linear) 2309 self.mapping = {x: x.search(fseq) for x in self} 2310 return self.mapping 2311 elif isinstance(dna, FormattedSeq): 2312 if (str(dna), dna.linear) == self.already_mapped: 2313 return self.mapping 2314 else: 2315 self.already_mapped = str(dna), dna.linear 2316 self.mapping = {x: x.search(dna) for x in self} 2317 return self.mapping 2318 raise TypeError("Expected Seq or MutableSeq instance, got %s instead" 2319 % type(dna))
2320 2321 2322 ############################################################################### 2323 # # 2324 # Restriction Analysis # 2325 # # 2326 ############################################################################### 2327 2328 _empty_DNA = DNA('') 2329 _restrictionbatch = RestrictionBatch()
2330 2331 2332 -class Analysis(RestrictionBatch, PrintFormat):
2333 """Provide methods for enhanced analysis and pretty printing.""" 2334
2335 - def __init__(self, restrictionbatch=_restrictionbatch, sequence=_empty_DNA, 2336 linear=True):
2337 """Initialize an Analysis with RestrictionBatch and sequence. 2338 2339 For most of the methods of this class if a dictionary is given it will 2340 be used as the base to calculate the results. 2341 If no dictionary is given a new analysis using the RestrictionBatch 2342 which has been given when the Analysis class has been instantiated, 2343 will be carried out and used. 2344 """ 2345 RestrictionBatch.__init__(self, restrictionbatch) 2346 self.rb = restrictionbatch 2347 self.sequence = sequence 2348 self.linear = linear 2349 if self.sequence: 2350 self.search(self.sequence, self.linear)
2351
2352 - def __repr__(self):
2353 """Represent ``Analysis`` class as a string.""" 2354 return 'Analysis(%s,%s,%s)' %\ 2355 (repr(self.rb), repr(self.sequence), self.linear)
2356
2357 - def _sub_set(self, wanted):
2358 """Filter result for keys which are in wanted (PRIVATE). 2359 2360 Internal use only. Returns a dict. 2361 2362 Screen the results through wanted set. 2363 Keep only the results for which the enzymes is in wanted set. 2364 """ 2365 # It seems that this method is not used in the whole class! 2366 return {k: v for k, v in self.mapping.items() if k in wanted}
2367
2368 - def _boundaries(self, start, end):
2369 """Set boundaries to correct values (PRIVATE). 2370 2371 Format the boundaries for use with the methods that limit the 2372 search to only part of the sequence given to analyse. 2373 """ 2374 if not isinstance(start, int): 2375 raise TypeError('expected int, got %s instead' % type(start)) 2376 if not isinstance(end, int): 2377 raise TypeError('expected int, got %s instead' % type(end)) 2378 if start < 1: # Looks like this tries to do python list like indexing 2379 start += len(self.sequence) 2380 if end < 1: 2381 end += len(self.sequence) 2382 if start < end: 2383 pass 2384 else: 2385 start, end = end, start 2386 if start < end: 2387 return start, end, self._test_normal
2388
2389 - def _test_normal(self, start, end, site):
2390 """Test if site is between start and end (PRIVATE). 2391 2392 Internal use only 2393 """ 2394 return start <= site < end
2395
2396 - def _test_reverse(self, start, end, site):
2397 """Test if site is between end and start, for circular sequences (PRIVATE). 2398 2399 Internal use only. 2400 """ 2401 return start <= site <= len(self.sequence) or 1 <= site < end
2402
2403 - def format_output(self, dct=None, title='', s1=''):
2404 """Collect data and pass to PrintFormat. 2405 2406 If dct is not given the full dictionary is used. 2407 """ 2408 if not dct: 2409 dct = self.mapping 2410 return PrintFormat.format_output(self, dct, title, s1)
2411
2412 - def print_that(self, dct=None, title='', s1=''):
2413 """Print the output of the analysis. 2414 2415 If dct is not given the full dictionary is used. 2416 s1: Title for non-cutting enzymes 2417 This method prints the output of A.format_output() and it is here 2418 for backwards compatibility. 2419 """ 2420 print(self.format_output(dct, title, s1))
2421
2422 - def change(self, **what):
2423 """Change parameters of print output. 2424 2425 It is possible to change the width of the shell by setting 2426 self.ConsoleWidth to what you want. 2427 self.NameWidth refer to the maximal length of the enzyme name. 2428 2429 Changing one of these parameters here might not give the results 2430 you expect. In which case, you can settle back to a 80 columns shell 2431 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until 2432 you get it right. 2433 """ 2434 for k, v in what.items(): 2435 if k in ('NameWidth', 'ConsoleWidth'): 2436 setattr(self, k, v) 2437 self.Cmodulo = self.ConsoleWidth % self.NameWidth 2438 self.PrefWidth = self.ConsoleWidth - self.Cmodulo 2439 elif k == 'sequence': 2440 setattr(self, 'sequence', v) 2441 self.search(self.sequence, self.linear) 2442 elif k == 'rb': 2443 self = Analysis.__init__(self, v, self.sequence, self.linear) 2444 elif k == 'linear': 2445 setattr(self, 'linear', v) 2446 self.search(self.sequence, v) 2447 elif k in ('Indent', 'Maxsize'): 2448 setattr(self, k, v) 2449 elif k in ('Cmodulo', 'PrefWidth'): 2450 raise AttributeError( 2451 'To change %s, change NameWidth and/or ConsoleWidth' % k) 2452 else: 2453 raise AttributeError('Analysis has no attribute %s' % k) 2454 return
2455
2456 - def full(self, linear=True):
2457 """Perform analysis with all enzymes of batch and return all results. 2458 2459 Full Restriction Map of the sequence, as a dictionary. 2460 """ 2461 return self.mapping
2462
2463 - def blunt(self, dct=None):
2464 """Return only cuts that have blunt ends.""" 2465 if not dct: 2466 dct = self.mapping 2467 return {k: v for k, v in dct.items() if k.is_blunt()}
2468
2469 - def overhang5(self, dct=None):
2470 """Return only cuts that have 5' overhangs.""" 2471 if not dct: 2472 dct = self.mapping 2473 return {k: v for k, v in dct.items() if k.is_5overhang()}
2474
2475 - def overhang3(self, dct=None):
2476 """Return only cuts that have 3' overhangs.""" 2477 if not dct: 2478 dct = self.mapping 2479 return {k: v for k, v in dct.items() if k.is_3overhang()}
2480
2481 - def defined(self, dct=None):
2482 """Return only results from enzymes that produce defined overhangs.""" 2483 if not dct: 2484 dct = self.mapping 2485 return {k: v for k, v in dct.items() if k.is_defined()}
2486
2487 - def with_sites(self, dct=None):
2488 """Return only results from enzyme with at least one cut.""" 2489 if not dct: 2490 dct = self.mapping 2491 return {k: v for k, v in dct.items() if v}
2492
2493 - def without_site(self, dct=None):
2494 """Return only results from enzymes that don't cut the sequence.""" 2495 if not dct: 2496 dct = self.mapping 2497 return {k: v for k, v in dct.items() if not v}
2498
2499 - def with_N_sites(self, N, dct=None):
2500 """Return only results from enzymes that cut the sequence N times.""" 2501 if not dct: 2502 dct = self.mapping 2503 return {k: v for k, v in dct.items() if len(v) == N}
2504
2505 - def with_number_list(self, list, dct=None):
2506 """Return only results from enzymes that cut (x,y,z,...) times.""" 2507 if not dct: 2508 dct = self.mapping 2509 return {k: v for k, v in dct.items() if len(v) in list}
2510
2511 - def with_name(self, names, dct=None):
2512 """Return only results from enzymes which names are listed.""" 2513 for i, enzyme in enumerate(names): 2514 if enzyme not in AllEnzymes: 2515 warnings.warn("no data for the enzyme: %s" % enzyme, 2516 BiopythonWarning) 2517 del names[i] 2518 if not dct: 2519 return RestrictionBatch(names).search(self.sequence, self.linear) 2520 return {n: dct[n] for n in names if n in dct}
2521
2522 - def with_site_size(self, site_size, dct=None):
2523 """Return only results form enzymes with a given site size.""" 2524 sites = [name for name in self if name.size == site_size] 2525 if not dct: 2526 return RestrictionBatch(sites).search(self.sequence) 2527 return {k: v for k, v in dct.items() if k in site_size}
2528
2529 - def only_between(self, start, end, dct=None):
2530 """Return only results from enzymes that only cut within start, end.""" 2531 start, end, test = self._boundaries(start, end) 2532 if not dct: 2533 dct = self.mapping 2534 d = dict(dct) 2535 for key, sites in dct.items(): 2536 if not sites: 2537 del d[key] 2538 continue 2539 for site in sites: 2540 if test(start, end, site): 2541 continue 2542 else: 2543 del d[key] 2544 break 2545 return d
2546
2547 - def between(self, start, end, dct=None):
2548 """Return only results from enzymes that cut at least within borders. 2549 2550 Enzymes that cut the sequence at least in between start and end. 2551 They may cut outside as well. 2552 """ 2553 start, end, test = self._boundaries(start, end) 2554 d = {} 2555 if not dct: 2556 dct = self.mapping 2557 for key, sites in dct.items(): 2558 for site in sites: 2559 if test(start, end, site): 2560 d[key] = sites 2561 break 2562 continue 2563 return d
2564
2565 - def show_only_between(self, start, end, dct=None):
2566 """Return only results from within start, end. 2567 2568 Enzymes must cut inside start/end and may also cut outside. However, 2569 only the cutting positions within start/end will be returned. 2570 """ 2571 d = [] 2572 if start <= end: 2573 d = [(k, [vv for vv in v if start <= vv <= end]) 2574 for k, v in self.between(start, end, dct).items()] 2575 else: 2576 d = [(k, [vv for vv in v if start <= vv or vv <= end]) 2577 for k, v in self.between(start, end, dct).items()] 2578 return dict(d)
2579
2580 - def only_outside(self, start, end, dct=None):
2581 """Return only results from enzymes that only cut outside start, end. 2582 2583 Enzymes that cut the sequence outside of the region 2584 in between start and end but do not cut inside. 2585 """ 2586 start, end, test = self._boundaries(start, end) 2587 if not dct: 2588 dct = self.mapping 2589 d = dict(dct) 2590 for key, sites in dct.items(): 2591 if not sites: 2592 del d[key] 2593 continue 2594 for site in sites: 2595 if test(start, end, site): 2596 del d[key] 2597 break 2598 else: 2599 continue 2600 return d
2601
2602 - def outside(self, start, end, dct=None):
2603 """Return only results from enzymes that at least cut outside borders. 2604 2605 Enzymes that cut outside the region in between start and end. 2606 They may cut inside as well. 2607 """ 2608 start, end, test = self._boundaries(start, end) 2609 if not dct: 2610 dct = self.mapping 2611 d = {} 2612 for key, sites in dct.items(): 2613 for site in sites: 2614 if test(start, end, site): 2615 continue 2616 else: 2617 d[key] = sites 2618 break 2619 return d
2620
2621 - def do_not_cut(self, start, end, dct=None):
2622 """Return only results from enzymes that don't cut between borders.""" 2623 if not dct: 2624 dct = self.mapping 2625 d = self.without_site() 2626 d.update(self.only_outside(start, end, dct)) 2627 return d
2628 2629 2630 # 2631 # The restriction enzyme classes are created dynamically when the module is 2632 # imported. Here is the magic which allow the creation of the 2633 # restriction-enzyme classes. 2634 # 2635 # The reason for the two dictionaries in Restriction_Dictionary 2636 # one for the types (which will be called pseudo-type as they really 2637 # correspond to the values that instances of RestrictionType can take) 2638 # and one for the enzymes is efficiency as the bases are evaluated 2639 # once per pseudo-type. 2640 # 2641 # However Restriction is still a very inefficient module at import. But 2642 # remember that around 660 classes (which is more or less the size of Rebase) 2643 # have to be created dynamically. However, this processing take place only 2644 # once. 2645 # This inefficiency is however largely compensated by the use of metaclass 2646 # which provide a very efficient layout for the class themselves mostly 2647 # alleviating the need of if/else loops in the class methods. 2648 # 2649 # It is essential to run Restriction with doc string optimisation (-OO 2650 # switch) as the doc string of 660 classes take a lot of processing. 2651 # 2652 CommOnly = RestrictionBatch() # commercial enzymes 2653 NonComm = RestrictionBatch() # not available commercially 2654 for TYPE, (bases, enzymes) in typedict.items(): 2655 # 2656 # The keys are the pseudo-types TYPE (stored as type1, type2...) 2657 # The names are not important and are only present to differentiate 2658 # the keys in the dict. All the pseudo-types are in fact RestrictionType. 2659 # These names will not be used after and the pseudo-types are not 2660 # kept in the locals() dictionary. It is therefore impossible to 2661 # import them. 2662 # Now, if you have look at the dictionary, you will see that not all the 2663 # types are present as those without corresponding enzymes have been 2664 # removed by Dictionary_Builder(). 2665 # 2666 # The values are tuples which contain 2667 # as first element a tuple of bases (as string) and 2668 # as second element the names of the enzymes. 2669 # 2670 # First eval the bases. 2671 # 2672 bases = tuple(eval(x) for x in bases) 2673 # 2674 # now create the particular value of RestrictionType for the classes 2675 # in enzymes. 2676 # 2677 T = type.__new__(RestrictionType, 'RestrictionType', bases, {}) 2678 for k in enzymes: 2679 # 2680 # Now, we go through all the enzymes and assign them their type. 2681 # enzymedict[k] contains the values of the attributes for this 2682 # particular class (self.site, self.ovhg,....). 2683 # 2684 newenz = T(k, bases, enzymedict[k]) 2685 # 2686 # we add the enzymes to the corresponding batch. 2687 # 2688 # No need to verify the enzyme is a RestrictionType -> add_nocheck 2689 # 2690 if newenz.is_comm(): 2691 CommOnly.add_nocheck(newenz) 2692 else: 2693 NonComm.add_nocheck(newenz) 2694 # 2695 # AllEnzymes is a RestrictionBatch with all the enzymes from Rebase. 2696 # 2697 AllEnzymes = RestrictionBatch(CommOnly) 2698 AllEnzymes.update(NonComm) 2699 # 2700 # Now, place the enzymes in locals so they can be imported. 2701 # 2702 names = [str(x) for x in AllEnzymes] 2703 try: 2704 del x # noqa 2705 except NameError: 2706 # Scoping changed in Python 3, the variable isn't leaked 2707 pass 2708 locals().update(dict(zip(names, AllEnzymes))) 2709 __all__ = ('FormattedSeq', 'Analysis', 'RestrictionBatch', 'AllEnzymes', 2710 'CommOnly', 'NonComm') + tuple(names) 2711 del k, enzymes, TYPE, bases, names 2712